This notebook has an objective to analyze athe information given by Argentinian government on the evolution of the current COVID19 pandemic.
The dataset used can be found at http://datos.salud.gob.ar/dataset/covid-19-casos-registrados-en-la-republica-argentina/archivo/fd657d02-a33a-498b-a91b-2ef1a68b8d16
It is a CSV file containing a row for each individual person that was suspected of having covid.
library(tidyr)
library(dplyr)
library(ggplot2)
library(magrittr)
library(leaflet)
library(rgdal)
cases <- read.csv("Covid19Casos.csv")
glimpse(cases)
## Rows: 4,543,501
## Columns: 25
## $ id_evento_caso <int> 1000000, 1000002, 1000003, 1000005, …
## $ sexo <chr> "M", "M", "F", "F", "M", "M", "F", "…
## $ edad <int> 53, 21, 41, 58, 28, 26, 69, 73, 7, 4…
## $ edad_años_meses <chr> "Años", "Años", "Años", "Años", "Año…
## $ residencia_pais_nombre <chr> "Argentina", "Argentina", "Argentina…
## $ residencia_provincia_nombre <chr> "CABA", "Buenos Aires", "CĂ³rdoba", "…
## $ residencia_departamento_nombre <chr> "SIN ESPECIFICAR", "La Matanza", "Ca…
## $ carga_provincia_nombre <chr> "Buenos Aires", "Buenos Aires", "CĂ³r…
## $ fecha_inicio_sintomas <chr> "", "", "2020-05-24", "", "2020-05-3…
## $ fecha_apertura <chr> "2020-06-01", "2020-06-01", "2020-06…
## $ sepi_apertura <int> 23, 23, 23, 23, 23, 23, 23, 23, 23, …
## $ fecha_internacion <chr> "", "", "", "", "", "", "", "", "", …
## $ cuidado_intensivo <chr> "NO", "NO", "NO", "NO", "NO", "NO", …
## $ fecha_cui_intensivo <chr> "", "", "", "", "", "", "", "", "", …
## $ fallecido <chr> "NO", "NO", "NO", "NO", "NO", "NO", …
## $ fecha_fallecimiento <chr> "", "", "", "", "", "", "", "", "", …
## $ asistencia_respiratoria_mecanica <chr> "NO", "NO", "NO", "NO", "NO", "NO", …
## $ carga_provincia_id <int> 6, 6, 14, 50, 6, 34, 2, 6, 2, 82, 2,…
## $ origen_financiamiento <chr> "Privado", "PĂºblico", "Privado", "PĂºâ€¦
## $ clasificacion <chr> "Caso Descartado", "Caso Descartado"…
## $ clasificacion_resumen <chr> "Descartado", "Descartado", "Descart…
## $ residencia_provincia_id <int> 2, 6, 14, 50, 6, 34, 2, 6, 2, 82, 2,…
## $ fecha_diagnostico <chr> "2020-06-09", "2020-06-01", "2020-06…
## $ residencia_departamento_id <int> 0, 427, 14, 49, 515, 35, 0, 260, 7, …
## $ ultima_actualizacion <chr> "2020-12-31", "2020-12-31", "2020-12…
Our first step is to clean up the Data.
Some people can be as old as 1000 years old, which is clearly incorrect. We will only consider those 100 years old or younger. We will format the date strings into a proper date type. We will not consider the cases who were discarded as not having Covid, however we will keep the suspicious cases and treat them as positives.
cases <- filter(cases, clasificacion_resumen != "Descartado")
cases <- filter(cases, edad < 100 )
cases$fecha_apertura <- as.Date(cases$fecha_apertura, "%Y-%m-%d")
cases$fecha_fallecimiento <- as.Date(cases$fecha_fallecimiento, "%Y-%m-%d")
length(cases$fallecido)
## [1] 1976907
group_by(cases, residencia_provincia_nombre) %>% summarise(total = n()) %>% arrange(desc(total))
## # A tibble: 25 x 2
## residencia_provincia_nombre total
## <chr> <int>
## 1 Buenos Aires 803344
## 2 Santa Fe 199847
## 3 CABA 189521
## 4 CĂ³rdoba 167376
## 5 TucumĂ¡n 117211
## 6 Mendoza 73765
## 7 Neuquén 49196
## 8 RĂo Negro 43299
## 9 Chubut 39422
## 10 Entre RĂos 37620
## # … with 15 more rows
argentina <- readOGR(dsn = "ARG_adm", layer = "ARG_adm1", use_iconv=TRUE, encoding='UTF-8', stringsAsFactors=FALSE, verbose = FALSE)
cases_by_province <- group_by(cases, residencia_provincia_nombre) %>%
summarise(total = n()) %>%
filter(residencia_provincia_nombre != "SIN ESPECIFICAR") %>%
mutate(NAME_1 = residencia_provincia_nombre) %>%
mutate(NAME_1 = replace(NAME_1, NAME_1=="CABA","Ciudad de Buenos Aires"))
argentina@data <- left_join(argentina@data,cases_by_province, by = c("NAME_1"))
state_popup <- paste0("<strong>Estado: </strong>",
argentina$NAME_1,
"<br><strong>Casos: </strong>",
argentina@data$total)
pal <- colorQuantile("YlGn", NULL, n = 5)
leaflet(data = argentina) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(fillColor = ~pal(total),
fillOpacity = 0.8,
color = "#BDBDC3",
weight = 1,
popup = state_popup)